Download packages and data
library(tidyverse)
library(ggplot2)
library(plotly)
library(RColorBrewer)
library(htmltools)
rawData <- read.csv(file = './data/data_depp_clean3.csv')
rawData <- rawData[rawData$Taille_Classe < 50,]
rawThresholds <- read.csv(file = './data/seuils.csv')
calc_T_F_PR <- function(x, Train, Outcome, Threshold){
y <- matrix(nrow = 2, ncol = length(x))
for(k in 1:length(x)){
echcT1 <- ifelse(Train <= x[k], 1, 0)
echcT2 <- ifelse(Outcome <= Threshold, 1, 0)
y[1, k] <- (t(echcT1)%*%(1-echcT2))/sum((1-echcT2))
y[2, k] <- (t(echcT1)%*%echcT2)/sum(echcT2)
}
return(y)
}
dataTrain <- rawData[, grepl("T1", names(rawData))]
dataOutcome <- rawData[, grepl("T3", names(rawData))]
whichThreshold <- 1
plotlist = list()
for(j in 1:ncol(dataOutcome))
{
ratios <- data.frame(test = character(), TPR = numeric(), FPR = numeric())
for(i in 1:ncol(dataTrain))
{
xAx <- sort(unique(dataTrain[, i]))
roc <- calc_T_F_PR(xAx, dataTrain[, i], dataOutcome[, j], rawThresholds[whichThreshold, which(names(rawThresholds) == names(dataOutcome)[j])])
ratiosTmp <- data.frame(test = rep(c(names(dataTrain)[i]), times = length(xAx)), TPR = roc[2,], FPR = roc[1,])
ratios <- rbind(ratios, ratiosTmp)
}
plotlist[[j]] = ggplotly(ggplot(ratios) + geom_point(aes(x = FPR, y = TPR, color = test)) + geom_line(aes(x = FPR, y = TPR, color = test)) + xlim(0, 1) + ylim(0, 1) + xlab('False positive rate') + ylab('True positive rate') + ggtitle(names(dataOutcome)[j]))
}
tagList(setNames(plotlist, NULL))